/// So far we have cleaned the users. This .do file merges in coverage and balance activity and creates new datasets at the user-action level (first purchase, coverage, and balance actions)

********************************************************************************
*** Merge Balance Data (includes refills and utilization of days of coverage)
use "data/int/hugo clean.dta", clear
merge 1:m id using "data/inputs/hugo/new_balance.dta", gen(_m_balance) keep(1 3)

* check that users who took up insurance have balance activity 
* and those who did not do not:
assert _m_balance == 3 if bound == 1
assert _m_balance == 1 if bound == 0

*keep the bound users data
sort id timestamp

*because some actions are recorded at the same time, we reorder actions to match ledger
*at each point, the next closest timestamp with a previous balance that matches your new balance is
*the correct next index

*wherever there is duplicate mindex, start with the observation before;
*match the newbal to the mindex duplicate with the same prevbal'
*that match gets the value of mindex, and we add one to the other mindices

bysort id timestamp: gen nval = _n
sum nval
local numrep = r(max) - 2

*strat by sorting and creating a first action index
sort id timestamp balanceindays prevbal newbal amount
by id: gen action_index = _n

*get the min and max index for any duplicate timestamps
sort id action_index
bysort id timestamp: egen mindex = min(action_index)
bysort id timestamp: egen maxdex = max(action_index)

*make a new index so that we're not altering the original we created
gen new_index = action_index

*give those initial binds an index of 0; we know they're first
replace new_index = 0 if source == "initial_bind" & prevbal == 0
replace mindex = 0 if source == "initial_bind" & prevbal == 0

*now we match the newbal of the last non-duplicate with the prevbal among the duplicates
	*find out where there are duplicates
	bysort id mindex: gen dubmin = _N >= 2
	*get the newbal of the last observation before duplicates
	gen minnew = newbal if dubmin[_n+1] == 1 & dubmin == 0
	*get this value expanded to all those duplicates for checking
		gen minnew2 = minnew[_n-1]
		drop minnew
		bysort id mindex: egen minnew = mean(minnew2)
		drop minnew2
		sort id mindex

*reshuffle to match previous and next balance
replace new_index = mindex if abs(prevbal - minnew) < 0.01
replace new_index = maxdex if abs(prevbal - minnew) > 0.01 & !mi(minnew)

sort id new_index

*repeat the procedure now that we've indexed
forval i = 1/`numrep' {
	drop dubmin minnew
	*because the non-matcher duplicates now have the max index, we can sort here by index
	bysort id new_index: gen dubmin = _N >= 2
	*same as above to get our match with the observation we matched above as our new "known newbal"
		gen minnew = newbal if dubmin[_n+1] == 1 & dubmin == 0 | mi(dubmin)
		gen minnew2 = minnew[_n-1]
		drop minnew
		bysort id mindex: egen minnew = mean(minnew2) if !mi(dubmin)
		drop minnew2

	*replace the duplicate that matched with mindex + i, as mindex + i - 1 is already taken
	replace new_index = mindex + `i' if abs(prevbal - minnew) < 0.01
}
	
sort id new_index
	
*check that prevbal == newbal
*round again
replace newbal = round(newbal, .01)
replace prevbal = round(prevbal, .01)
gen newprevbal = newbal[_n-1] if id == id[_n-1]
replace newprevbal = 0 if source == "initial_bind"
gen prevdiff = newprevbal - prevbal

*confirm that our action order is correct:
assert abs(prevdiff) < 0.01 if !mi(prevdiff)

sort id new_index

bysort id: gen action_order = _n
gen order_transactions = action_order

drop nval mindex-prevdiff

*confirm purchases are 3, 7, 14, or 30 days, plus 10 days at bind
gen prevres = reservebalanceindays[_n-1] if id == id[_n-1]
gen paydays = amount/dayrate if type == "add_balance"
replace paydays = paydays - 10 if source == "initial_bind"
replace paydays = paydays - (10 - prevres) if !mi(prevres)
replace paydays = round(paydays, 1)
tab paydays, mi 

assert !mi(paydays) if type=="add_balance"
assert inlist(paydays, 3, 7, 14, 30) | mi(paydays) if daily==1

********************************************************************************
*** infer number of days purchased from reserve balance and balance change

sort id action_order
by id: gen prev_reservedays = reservebalanceindays[_n-1]
by id: gen prev2_reservedays = reservebalanceindays[_n-2]
by id: gen prev_balanceindays = balanceindays[_n-1]

* Define number of days purchased
gen amount_days = round(amount / dayrate, 1) if type=="add_balance"
	replace amount_days = amount_days - 10 if source=="initial_bind" & type=="add_balance"
	replace amount_days = 90 if cohort=="DAILY_CONTROL" & type=="add_balance"
	replace amount_days = (amount_days-(10-prev_reservedays)) if ///
		type=="add_balance" & source!="initial_bind"

gen amount_days_plus_res = amount_days + (10-prev_reservedays) if ///
	type=="add_balance" & !missing(prev_reservedays)

*change amount to amount paid for initial bind
gen amount_paid = dayrate * amount_days if ///
	type=="add_balance" & source=="initial_bind"
		// Number of days times day rate (no grace days)
replace amount_paid = (24/30) * dayrate * amount_days if ///
	bundle==1 & amount_days==30 & type=="add_balance" & source=="initial_bind" 
		// Add bundle discount
replace amount_paid = (12/14) * dayrate * amount_days if ///
	bundle==1 & amount_days==14 & type=="add_balance" & source=="initial_bind"
		// Add bundle discount
replace amount_paid = dayrate * amount_days_plus_res if ///
	type=="add_balance" & source!="initial_bind"
		// Days + repaying grace days
replace amount_paid = (24/30) * dayrate * amount_days + dayrate*(10-prev_reservedays) if ///
	bundle==1 & amount_days==30 & type=="add_balance" & source!="initial_bind"
		// Add bundle discount
replace amount_paid = (12/14) * dayrate * amount_days + dayrate*(10-prev_reservedays) if ///
	bundle==1 & amount_days==14 & type=="add_balance" & source!="initial_bind"
		// Add bundle discount
replace amount_paid = amount if ///
	cohort=="DAILY_CONTROL" & type=="add_balance"
		// Assign total amount paid for controls 
replace amount_paid = round(amount_paid,.01) // Round so we have an amount to match

********************************************************************************
*** counterfactual dayrate

*participants in the lower and higher price cohorts face 20% price changes
gen basedayrate = dayrate if price == "base"
replace basedayrate = dayrate/0.80 if price == "low"
replace basedayrate = dayrate/1.20 if price == "high"

********************************************************************************
*** get date limits for two- and three- month timeframes

*get first and last days and format timestamp
sort id action_order
gen firstday = timestamp_bind
replace firstday = timestamp_usercreated if mi(timestamp_bind)
format firstday %tc
assert !mi(firstday)
bysort id: egen firstpay1 = min(timestamp) if type == "pay_coverage"
bysort id: egen firstpay = mode(firstpay1)
format firstpay %tc

*check first day verus first pay
assert firstday <= firstpay if !mi(firstday) & !mi(firstpay)
gen dayfrompaytoplay = (firstpay - firstday)/(1000*60*60*24)

*code up date limits for two- and three-month timeframes
gen timesincestart = (timestamp - firstday)/(1000*60*60*24)

local nths = "nth nthcr"
foreach nth of local nths {
	gen tempfirst = firstday
	if "`nth'"=="nthcr" replace tempfirst = timestamp_usercreated
	gen firstmonth = month(dofc(tempfirst))
	gen firstdayday = day(dofc(tempfirst))
	gen firstdayyear = year(dofc(tempfirst))

	gen hr = hh(tempfirst)
	gen min = mm(tempfirst)
	gen sec = ss(tempfirst)

	gen twomo`nth' = cofd(mdy(firstmonth+2,firstdayday,firstdayyear))
	replace twomo`nth' = cofd(mdy(firstmonth+2-12,firstdayday,firstdayyear+1)) if firstmonth>=11
	replace twomo`nth' = twomo`nth' + hr*60*60*1000 + min*60*1000 + sec*1000
	format twomo`nth' %tc
	gen threemo`nth' = cofd(mdy(firstmonth+3,firstdayday,firstdayyear))
	replace threemo`nth' = cofd(mdy(firstmonth+3-12,firstdayday,firstdayyear+1)) if firstmonth>=10
	replace threemo`nth' = cofd(mdy(firstmonth+4,firstdayday-30,firstdayyear)) if ///
		firstmonth==3 & firstdayday==31
	replace threemo`nth' = threemo`nth' + hr*60*60*1000 + min*60*1000 + sec*1000
	format threemo`nth' %tc
	assert !mi(twomo`nth') if !mi(firstmonth)
	assert !mi(threemo`nth') if !mi(firstmonth)
	drop tempfirst firstmonth firstdayday firstdayyear hr min sec
}

********************************************************************************
*** analyze individual purchases

*define initial purchase
gen firstpur = amount_paid if source == "initial_bind"
bysort id: egen firstpurchase = mean(firstpur)

gen initialbinddays = firstpurchase/dayrate

*define made second deposit, size of second deposit
gen ispayment = 1 if type == "add_balance" & ///
	inlist(source, "autopay", "manual_sms", "manual_web", "initial_bind")
sort id ispayment action_order
by id ispayment: gen payorder = _n if ispayment == 1
assert source == "initial_bind" if payorder == 1

bysort id: egen payments = max(payorder)
bysort id: egen tempmeanpay = mean(amount_paid) if ispayment == 1
bysort id: egen meanpay = median(tempmeanpay)
drop tempmeanpay

quietly sum payments
local maxpayments = r(max)

sort id action_order

forval i=1/`maxpayments' {
	gen tempayment`i' = amount_paid if payorder == `i'
	gen tempdays`i' = amount_paid/dayrate if payorder == `i'
	replace tempdays`i' = tempdays`i' - outstandingbalanceindays[_n-1] if ///
		payorder == `i' & payorder > 1
	gen tempprevbal`i' = prev_balanceindays if payorder == `i'
	gen tempprevres`i' = prev_reservedays if payorder == `i'
	bysort id: egen payment`i' = median(tempayment`i')
	bysort id: egen paydays`i' = median(tempdays`i')
	bysort id: egen prevbal`i' = median(tempprevbal`i')
	bysort id: egen prevres`i' = median(tempprevres`i')
	drop tempayment`i' tempdays`i' tempprevbal`i' tempprevres`i'
	gen prevall`i' = prevbal`i'+prevres`i'
	
	replace paydays`i' = round(paydays`i')
	gen bundled`i' = .
	replace bundled`i' = 1 if paydays`i' >= 12 & !mi(paydays`i') & cohort != "DAILY_CONTROL"
	replace bundled`i' = 0 if paydays`i' < 12 & !mi(paydays`i') & cohort != "DAILY_CONTROL"
}

drop ispayment
drop payorder

********************************************************************************
*** save data

*this dataset has all users who did anything and all their actions
save "data/cleaned/hugo/All balance actions.dta", replace

*keep only coverage actions
preserve
	keep if type == "pay_coverage"

	save "data/cleaned/hugo/All insurance actions.dta", replace
restore
	

beep
